library("phyloseq")
library("vegan")
library("DESeq2")
library("ggplot2")
library("dendextend")
library("tidyr")
library("viridis")
library("reshape")
library("metacoder")
library("dplyr")
library('readr')
library('stringr')
library('agricolae')
library('ape')
library('ggrepel')
library("RColorBrewer")
library('rstatix')
library('microeco')
library('ggalluvial')
library('iNEXT')
library('ggvenn')
library('gridExtra')
sessionInfo()
## R version 4.4.2 (2024-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26100)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_Germany.utf8  LC_CTYPE=English_Germany.utf8   
## [3] LC_MONETARY=English_Germany.utf8 LC_NUMERIC=C                    
## [5] LC_TIME=English_Germany.utf8    
## 
## time zone: Europe/Berlin
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] gridExtra_2.3               ggvenn_0.1.10              
##  [3] iNEXT_3.0.1                 ggalluvial_0.12.5          
##  [5] microeco_1.12.0             rstatix_0.7.2              
##  [7] RColorBrewer_1.1-3          ggrepel_0.9.6              
##  [9] ape_5.8-1                   agricolae_1.3-7            
## [11] stringr_1.5.1               readr_2.1.5                
## [13] dplyr_1.1.4                 metacoder_0.3.7            
## [15] reshape_0.8.9               viridis_0.6.5              
## [17] viridisLite_0.4.2           tidyr_1.3.1                
## [19] dendextend_1.19.0           ggplot2_3.5.1              
## [21] DESeq2_1.46.0               SummarizedExperiment_1.36.0
## [23] Biobase_2.66.0              MatrixGenerics_1.18.1      
## [25] matrixStats_1.4.1           GenomicRanges_1.58.0       
## [27] GenomeInfoDb_1.42.1         IRanges_2.40.1             
## [29] S4Vectors_0.44.0            BiocGenerics_0.52.0        
## [31] vegan_2.6-8                 lattice_0.22-6             
## [33] permute_0.9-7               phyloseq_1.50.0            
## 
## loaded via a namespace (and not attached):
##  [1] rlang_1.1.4             magrittr_2.0.3          ade4_1.7-22            
##  [4] compiler_4.4.2          mgcv_1.9-1              vctrs_0.6.5            
##  [7] reshape2_1.4.4          pkgconfig_2.0.3         crayon_1.5.3           
## [10] fastmap_1.2.0           backports_1.5.0         XVector_0.46.0         
## [13] rmarkdown_2.29          tzdb_0.4.0              UCSC.utils_1.2.0       
## [16] purrr_1.0.2             xfun_0.49               zlibbioc_1.52.0        
## [19] cachem_1.1.0            jsonlite_1.8.9          biomformat_1.34.0      
## [22] rhdf5filters_1.18.0     DelayedArray_0.32.0     Rhdf5lib_1.28.0        
## [25] BiocParallel_1.40.0     broom_1.0.7             parallel_4.4.2         
## [28] cluster_2.1.8           R6_2.5.1                bslib_0.8.0            
## [31] stringi_1.8.4           car_3.1-3               jquerylib_0.1.4        
## [34] Rcpp_1.0.13-1           iterators_1.0.14        knitr_1.49             
## [37] Matrix_1.7-1            splines_4.4.2           igraph_2.1.2           
## [40] tidyselect_1.2.1        rstudioapi_0.17.1       abind_1.4-8            
## [43] yaml_2.3.10             AlgDesign_1.2.1.1       codetools_0.2-20       
## [46] tibble_3.2.1            plyr_1.8.9              withr_3.0.2            
## [49] evaluate_1.0.3          survival_3.8-3          Biostrings_2.74.1      
## [52] pillar_1.10.1           carData_3.0-5           foreach_1.5.2          
## [55] generics_0.1.3          hms_1.1.3               munsell_0.5.1          
## [58] scales_1.3.0            glue_1.8.0              tools_4.4.2            
## [61] data.table_1.16.4       locfit_1.5-9.10         rhdf5_2.50.2           
## [64] colorspace_2.1-1        nlme_3.1-166            GenomeInfoDbData_1.2.13
## [67] Formula_1.2-5           cli_3.6.3               S4Arrays_1.6.0         
## [70] gtable_0.3.6            sass_0.4.9              digest_0.6.37          
## [73] SparseArray_1.6.0       htmltools_0.5.8.1       multtest_2.62.0        
## [76] lifecycle_1.0.4         httr_1.4.7              MASS_7.3-64

Reading the data

#reading the data
count_tab <-read.table("Raw_Counts.txt", header=T, row.names=1, check.names=F)
sample_info <- read.table("Sample_Info.txt", header=T, row.names=1, check.names=F)

tax_tab<- read.delim("Taxonomy.txt")
tax_tab$OTU[duplicated(tax_tab$OTU)] # check for duplicated OTUs
## character(0)
count_tab <- count_tab[rownames(count_tab) %in% tax_tab$OTU, ]

Treatment of NTC

# sum of NTC counts for each OTU
NTC <-rowSums(count_tab[, 1:6]) 

# sum of sample counts for each OTU
sample_counts <- rowSums(count_tab[, 7:90])

# normalize sample counts by dividing the samples' total by 12 – as there are 12 as many samples (84) as there are NTCs (6) (84/6=14)
norm_sample_OTU_counts <- sample_counts/14

# which OTUs are deemed likely contaminants based on the threshold noted above:
blank_OTUs <- names(NTC[NTC * 2  > norm_sample_OTU_counts]) # OTUs which have more counts than 1/2 of the sample counts 
length(blank_OTUs) # this approach identified  4 out of ~283 OTUs that are likely to have originated from contamination
## [1] 4
# looking at the percentage of reads retained for each sample after removing these presumed contaminant OTUs 
colSums(count_tab[!rownames(count_tab) %in% blank_OTUs, ]) / colSums(count_tab) * 100
##       NTC       NTC       NTC       NTC       NTC       NTC        P2       D26 
##  47.33096 100.00000  42.47967 100.00000 100.00000  49.33333 100.00000  99.99846 
##       P27        P6        D6        V6        S6        V4        P9       S22 
## 100.00000 100.00000 100.00000 100.00000 100.00000 100.00000 100.00000 100.00000 
##        D2      P30A        V9       S13      S30A        D9       D13       V13 
## 100.00000 100.00000 100.00000  99.99891  99.99435 100.00000 100.00000 100.00000 
##       V23       V17       V16       D16       S17       P22       P17       D17 
## 100.00000 100.00000 100.00000 100.00000 100.00000  99.99788 100.00000  99.99230 
##        S9       P10       D22        S4        P5       P23       D23        V2 
## 100.00000 100.00000 100.00000 100.00000 100.00000 100.00000  99.99784 100.00000 
##      V2_3      S2_2       D24       V24        V5       S24       P26       S31 
## 100.00000  99.99878 100.00000 100.00000 100.00000 100.00000 100.00000  99.99889 
##       S27        S2       V27       S26       V10      V2_2      V30B       V78 
## 100.00000 100.00000 100.00000 100.00000  99.95886 100.00000 100.00000 100.00000 
##      V30A       P16      D30B       S10      D30A       S78       D31       P13 
## 100.00000 100.00000 100.00000 100.00000  99.99615 100.00000 100.00000 100.00000 
##        D4        D5       P76       D76       V76      S30B       P31       D78 
##  99.99815 100.00000 100.00000 100.00000 100.00000  99.99704 100.00000 100.00000 
##       S76       P78        P4      P2_2      P2_3      D2_2       D27       S23 
##  99.99370 100.00000 100.00000 100.00000 100.00000  99.99735 100.00000 100.00000 
##       P24      S2_3       V31      P30B       V26       V22       D10       S16 
## 100.00000 100.00000 100.00000 100.00000 100.00000  99.97795  99.99355  99.99753 
##      D2_3        S5 
##  99.99515 100.00000
filt_count_tab <- count_tab[!rownames(count_tab) %in% blank_OTUs, -c(1:6)] # removing blank_OTUs and the blank samples from further analysis
filt_count_tab <- filt_count_tab[rowSums(filt_count_tab == 0) != ncol(filt_count_tab), ] # delete OTUs with 0 counts -> 278 OTUs left

Normalizing counts for sampling depth (variance stabilizing transformation in DEseq2 package)

#reorder the row and column names
sample_info<-sample_info[order(rownames(sample_info)), ]
filt_count_tab<- filt_count_tab[,order(names(filt_count_tab))]

#add 1 to every count, so it is possible to calculate the geometric mean later
filt_count_tab1 <- filt_count_tab + 1 

# create a DESeq2 object
deseq_counts <- DESeqDataSetFromMatrix(filt_count_tab1, colData = sample_info, design = ~Species) 
## converting counts to integer mode
deseq_counts_vst <- varianceStabilizingTransformation(deseq_counts, fitType='local')

#save as a matrix
vst_trans_count_tab <- assay(deseq_counts_vst)

#transform lowest values (absent counts) to 0
vst_trans_count_tab <- sweep(vst_trans_count_tab, 2, apply(vst_trans_count_tab, 2, min), "-") 

Add colors specific to species to the sample info table

sample_info$color[sample_info$Species == "Pinus"] <- "dodgerblue3"
sample_info$color[sample_info$Species == "Diphasiastrum"] <- "darkorange1"
sample_info$color[sample_info$Species == "Vaccinium"] <- "purple4"
sample_info$color[sample_info$Species == "Soil"] <- "hotpink2"

sample_info_no_rep <- read.table("Sample_Info_noR.txt",header=T, row.names=1, check.names=F)
sample_info_no_rep<-sample_info_no_rep[order(rownames(sample_info_no_rep)), ]
sample_info_no_rep$color[sample_info_no_rep$Species == "Pinus"] <- "dodgerblue3"
sample_info_no_rep$color[sample_info_no_rep$Species == "Diphasiastrum"] <- "darkorange1"
sample_info_no_rep$color[sample_info_no_rep$Species == "Vaccinium"] <- "purple4"
sample_info_no_rep$color[sample_info_no_rep$Species == "Soil"] <- "hotpink2"

Venn diagram: presence of OTUs in the sample type

#remove technical replicates:
Venn_OTU <- as.data.frame(vst_trans_count_tab)
Venn_OTU <- (Venn_OTU[, !grepl("_", colnames(Venn_OTU))])

Venn_S <- grepl("S", names(Venn_OTU))
OTU_S <- rownames(Venn_OTU)[rowSums(Venn_OTU[, Venn_S]) != 0]

Venn_P <- grepl("P", names(Venn_OTU))
OTU_P <- rownames(Venn_OTU)[rowSums(Venn_OTU[, Venn_P]) != 0]

Venn_V <- grepl("V", names(Venn_OTU))
OTU_V <- rownames(Venn_OTU)[rowSums(Venn_OTU[, Venn_V]) != 0]

Venn_D <- grepl("D", names(Venn_OTU))
OTU_D <- rownames(Venn_OTU)[rowSums(Venn_OTU[, Venn_V]) != 0]


venn <- list("Soil" = OTU_S, "PINsyl" = OTU_P, "VACmyr" = OTU_V, "DIPHcom" = OTU_D)
  
ggvenn(venn, fill_color = c("hotpink2", "dodgerblue3","purple4","darkorange1"),set_name_size = 4)

#write the table to use it in jvenn tool: https://jvenn.toulouse.inrae.fr/app/index.html
#max_length <- max(length(OTU_S), length(OTU_P), length(OTU_V), length(OTU_D))
#OTU_S <- c(OTU_S, rep(NA, max_length - length(OTU_S)))
#OTU_P <- c(OTU_P, rep(NA, max_length - length(OTU_P)))
#OTU_V <- c(OTU_V, rep(NA, max_length - length(OTU_V)))
#OTU_D <- c(OTU_D, rep(NA, max_length - length(OTU_D)))
#Venn_df <- data.frame(OTU_S, OTU_P, OTU_V, OTU_D)

#write_tsv(Venn_df, "venn")

Ordination Plots with package phyloseq (https://joey711.github.io/phyloseq/index.html)

PCoA: all OTUs, with technical replicates

# create phyloseq object with normalized data
vst_count_phy <- otu_table(vst_trans_count_tab, taxa_are_rows=T)
sample_info_tab_phy <- sample_data(sample_info)
vst_physeq <- phyloseq(vst_count_phy, sample_info_tab_phy)

#generating and visualizing the PCoA with phyloseq
vst_pcoa <- ordinate(vst_physeq, method="PCoA", distance="euclidean")
eigen_vals <- vst_pcoa$values$Eigenvalues # allows us to scale the axes according to their magnitude of separating apart the samples

pca<-plot_ordination(vst_physeq, vst_pcoa, color="Species") + 
  labs(col="Species") + geom_point(size=1) + 
  geom_text_repel(aes(label=rownames(sample_info)), size =3, force = 5, max.overlaps = 50) + 
  coord_fixed(sqrt(eigen_vals[2]/eigen_vals[1])) + ggtitle("PCoA") + 
  scale_color_manual(values=unique(sample_info$color[order(sample_info$Species)])) + 
  theme(legend.position="none")
pca

Samples labelled as “S2”, “S2_2” and “S2_3” - three technical replicates of the soil sample S2. Same for D2, P2 and V2.

PCoA - all OTUs, without technical replicates

vst_count_phy <- otu_table(vst_trans_count_tab, taxa_are_rows=T) #count data
sample_info_tab_phy <- sample_data(sample_info_no_rep) # sample data

tax_tab_physeq_all <- tax_tab[tax_tab$OTU %in% rownames(vst_trans_count_tab), ]
tax_tab_physeq_matrix <- as.matrix(tax_tab_physeq_all[, c("OTU", "Phylum", "Class", "Order")])
rownames(tax_tab_physeq_matrix) <- tax_tab_physeq_all[[1]]
tax_matrix<- tax_table(tax_tab_physeq_matrix) # OTU data
vst_physeq <- merge_phyloseq(vst_count_phy, sample_info_tab_phy, tax_matrix)

vst_pcoa <- ordinate(vst_physeq, method="PCoA", distance = "euclidean")
eigen_vals <- vst_pcoa$values$Eigenvalues 

pcoa<-plot_ordination(vst_physeq, vst_pcoa, color="Species") + 
  labs(col="Species") + geom_point(size=1) + 
  coord_fixed(sqrt(eigen_vals[2]/eigen_vals[1])) + ggtitle("PCoA: All OTUs") +
  scale_color_manual(values=unique(sample_info_no_rep$color[order(sample_info_no_rep$Species)])) 

NMDS: Sample Type - all OTUs, without technical replicates

vst_pcoa <- ordinate(vst_physeq, method = "NMDS", distance = "bray")
## Wisconsin double standardization
## Run 0 stress 0.197018 
## Run 1 stress 0.1979774 
## Run 2 stress 0.1990485 
## Run 3 stress 0.2008448 
## Run 4 stress 0.1968931 
## ... New best solution
## ... Procrustes: rmse 0.01108067  max resid 0.06254305 
## Run 5 stress 0.1990901 
## Run 6 stress 0.1977946 
## Run 7 stress 0.1979634 
## Run 8 stress 0.2215886 
## Run 9 stress 0.1979773 
## Run 10 stress 0.1990486 
## Run 11 stress 0.2215838 
## Run 12 stress 0.1970252 
## ... Procrustes: rmse 0.01105955  max resid 0.06280142 
## Run 13 stress 0.2217747 
## Run 14 stress 0.1968822 
## ... New best solution
## ... Procrustes: rmse 0.001338591  max resid 0.006189816 
## ... Similar to previous best
## Run 15 stress 0.2127896 
## Run 16 stress 0.2127939 
## Run 17 stress 0.1990562 
## Run 18 stress 0.1970063 
## ... Procrustes: rmse 0.004348012  max resid 0.03342215 
## Run 19 stress 0.22015 
## Run 20 stress 0.2252388 
## *** Best solution repeated 1 times
nmds<- plot_ordination(vst_physeq, vst_pcoa, color="Species", title="NMDS: All OTUs", type ="sample") +
  scale_color_manual(values=unique(sample_info_no_rep$color[order(sample_info_no_rep$Species)]))

NMDS: Sample types - only OTUs of Ectomycorrhizal (ECM) fungi

# select only ECM OTUs:
ECM_OTUs <- tax_tab_physeq_all$OTU[tax_tab_physeq_all$Guild %in% c("Ectomycorrhizal")]
ECM_vst_trans_count_tab <- vst_trans_count_tab[ECM_OTUs, ]


vst_count_phy <- otu_table(ECM_vst_trans_count_tab, taxa_are_rows=T)
sample_info_tab_phy <- sample_data(sample_info_no_rep)

vst_physeq <- merge_phyloseq(vst_count_phy, sample_info_tab_phy)
vst_pcoa <- ordinate(vst_physeq, method = "NMDS",distance = "bray")
## Wisconsin double standardization
## Run 0 stress 0.2822805 
## Run 1 stress 0.3115204 
## Run 2 stress 0.2878505 
## Run 3 stress 0.2863862 
## Run 4 stress 0.2811718 
## ... New best solution
## ... Procrustes: rmse 0.08256871  max resid 0.2862336 
## Run 5 stress 0.2919735 
## Run 6 stress 0.2898905 
## Run 7 stress 0.2918868 
## Run 8 stress 0.2851065 
## Run 9 stress 0.2985755 
## Run 10 stress 0.2806006 
## ... New best solution
## ... Procrustes: rmse 0.03352879  max resid 0.1503356 
## Run 11 stress 0.2882571 
## Run 12 stress 0.2897858 
## Run 13 stress 0.2824357 
## Run 14 stress 0.29074 
## Run 15 stress 0.2882046 
## Run 16 stress 0.2842404 
## Run 17 stress 0.2914153 
## Run 18 stress 0.2972009 
## Run 19 stress 0.2950679 
## Run 20 stress 0.2869468 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      2: no. of iterations >= maxit
##     18: stress ratio > sratmax
nmds_ECM<-plot_ordination(vst_physeq, vst_pcoa, color="Species", title="NMDS: Ectomycorrhizal", type = "sample")+
  scale_color_manual(values=unique(sample_info_no_rep$color[order(sample_info_no_rep$Species)])) 

NMDS: Sample types - only OTUs of Saprotrophic fungi

# select only Saprotrophic OTUs:
Sapro_OTUs <- tax_tab_physeq_all$OTU[tax_tab_physeq_all$Guild %in% c("Saprotroph")]
Sapro_vst_trans_count_tab <- vst_trans_count_tab[Sapro_OTUs, ]

vst_count_phy <- otu_table(Sapro_vst_trans_count_tab, taxa_are_rows=T)
sample_info_tab_phy <- sample_data(sample_info_no_rep)

vst_physeq <- merge_phyloseq(vst_count_phy, sample_info_tab_phy)

#generating and visualizing the PCoA with phyloseq
vst_pcoa <- ordinate(vst_physeq, method = "NMDS",distance = "bray")
## Wisconsin double standardization
## Run 0 stress 0.2039362 
## Run 1 stress 0.2158043 
## Run 2 stress 0.2038225 
## ... New best solution
## ... Procrustes: rmse 0.006115275  max resid 0.03270896 
## Run 3 stress 0.2038152 
## ... New best solution
## ... Procrustes: rmse 0.0003874135  max resid 0.001638865 
## ... Similar to previous best
## Run 4 stress 0.2277129 
## Run 5 stress 0.206158 
## Run 6 stress 0.2039516 
## ... Procrustes: rmse 0.006299264  max resid 0.03366575 
## Run 7 stress 0.204362 
## Run 8 stress 0.2061578 
## Run 9 stress 0.2037455 
## ... New best solution
## ... Procrustes: rmse 0.003662047  max resid 0.02857684 
## Run 10 stress 0.2057254 
## Run 11 stress 0.2061594 
## Run 12 stress 0.2275107 
## Run 13 stress 0.2160111 
## Run 14 stress 0.2328631 
## Run 15 stress 0.2038152 
## ... Procrustes: rmse 0.003662233  max resid 0.02856119 
## Run 16 stress 0.2040974 
## ... Procrustes: rmse 0.006520555  max resid 0.04329109 
## Run 17 stress 0.2156937 
## Run 18 stress 0.2059085 
## Run 19 stress 0.2037454 
## ... New best solution
## ... Procrustes: rmse 0.0001252684  max resid 0.0007076332 
## ... Similar to previous best
## Run 20 stress 0.2164165 
## *** Best solution repeated 1 times
nmds_Sapro<-plot_ordination(vst_physeq, vst_pcoa, color="Species", title="NMDS: Saprotrophs", type = "sample")+
  scale_color_manual(values=unique(sample_info_no_rep$color[order(sample_info_no_rep$Species)])) 
grid.arrange(nmds,nmds_ECM, nmds_Sapro,pcoa,  nrow = 2)

Taxonomic summaries

Phylum

#read info table with the samples without replicates
sample_info_no_rep <- read.table("Sample_Info_noR.txt", header=T, row.names=1, check.names=F)

tax_table<-tax_tab
row.names(tax_table) <- tax_table$OTU
tax_table <- tax_table[, -1]
tax_table<- tax_table[order(rownames(tax_table)),]

counts_norm_df<-as.data.frame(vst_trans_count_tab)

# create microeco file
meco_fungi <- microtable$new(sample_table = sample_info_no_rep, otu_table = counts_norm_df, tax_table = tax_table)
 
# color palette
mycol <- c("#A6CEE3" ,"#1F78B4" ,"#B2DF8A" ,"#33A02C" ,"#FB9A99", "#E31A1C" ,"#FDBF6F" ,"#FF7F00" ,"#CAB2D6" ,"#6A3D9A" ,"#FFFF99" ,"#B15928" , "#6fdc8c", "green4", "#004144")


t1 <- trans_abund$new(dataset = meco_fungi, taxrank = "Phylum", ntaxa = 15, groupmean = "Species")
Phylum <- t1$plot_bar(bar_full = FALSE, xtext_keep = TRUE, xtext_angle = 90,xtext_size = 6, color_values = mycol)

t1 <- trans_abund$new(dataset = meco_fungi, taxrank = "Order", ntaxa = 15,  groupmean = "Species")
Order <- t1$plot_bar(bar_full = FALSE,xtext_keep = TRUE, xtext_angle = 90,xtext_size = 6, color_values = mycol)

t1 <- trans_abund$new(dataset = meco_fungi, taxrank = "Class", ntaxa = 15, groupmean = "Species")
Class <- t1$plot_bar(bar_full = FALSE, xtext_keep = TRUE, xtext_angle = 90, xtext_size = 6, color_values = mycol)

t1 <- trans_abund$new(dataset = meco_fungi, taxrank = "Family", ntaxa = 15, groupmean = "Species")
Family<-t1$plot_bar(bar_full = FALSE, xtext_keep = TRUE, xtext_angle = 90,xtext_size = 6, color_values = mycol)

t1 <- trans_abund$new(dataset = meco_fungi, taxrank = "Genus", ntaxa = 15, groupmean = "Species")
Genus<-t1$plot_bar(bar_full = FALSE, xtext_keep = TRUE, xtext_angle = 90,xtext_size = 6, color_values = mycol)
grid.arrange(Phylum, Order, Class, Family, Genus, nrow = 2)

Class: Per plot per samples type

t1 <- trans_abund$new(dataset = meco_fungi, taxrank = "Class", ntaxa = 10)
## The transformed abundance data is stored in object$data_abund ...
t1$plot_bar(bar_type = "notfull", use_alluvium = T, xtext_keep = TRUE, xtext_angle = 90,facet = "Location",xtext_size = 6, color_values = RColorBrewer::brewer.pal(10, "Set3"))
## Warning: The `bar_type` argument of `plot_bar()` is deprecated as of microeco 1.7.0.
## ℹ Please use the `bar_full` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

“-” = not determined

Heat tree

All taxa, all samples

tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs

counts_norm_df2<-counts_norm_df
row_names <- rownames(counts_norm_df)
rownames(counts_norm_df2) <- NULL
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column

# Read sample info without technical replicates
sample_data <-read.table("Sample_Info_noR.txt", sep = "\t", header=T, check.names=F)

# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df, tax_data, by = "OTU" ) 

#converting to taxmap format
obj <- parse_tax_data(Tax_otu_data,
                      class_cols = "taxonomy",
                      class_sep = ";")

#remove unclassified taxa 
obj <- filter_taxa(obj, taxon_names != "-")

set.seed(50)
heattree<-heat_tree(obj, node_label = gsub(pattern = "\\[|\\]", replacement = "", taxon_names),
            node_size = n_obs,
            node_size_range = c(0.01, 0.04), 
            node_color = n_obs,
            node_color_range = c("#C9E4CA", "#87BBA2", "#55828B", "#3B6064", "#364958"),
            edge_label = n_obs,
            edge_label_size = 0.015, 
            node_label_size_range = c(0.01, 0.015), 
            node_color_axis_label = "OTUs",
            layout = "davidson-harel", initial_layout = "reingold-tilford")

plot(heattree)

Comparison of sample types (all OTUs)

tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs

counts_norm_df2<-counts_norm_df
rownames(counts_norm_df2) <- NULL
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column

sample_data <-read.table("Sample_Info_noR.txt", sep = "\t", header=T, check.names=F)

# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df, tax_data, by = "OTU" ) 
Tax_otu_data  <- subset(Tax_otu_data , select = -grep("_", names(Tax_otu_data ))) #delete technical replicates


#converting to taxmap format
obj <- parse_tax_data(Tax_otu_data,
                      class_cols = "taxonomy", # The column in the input table
                      class_sep = ";") # What each taxon is separated by

#remove  "-" (unclassified taxa):
obj <- filter_taxa(obj, taxon_names != "-")

#For the heat tree comparison of abundances
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = sample_data$Sample)
## Summing per-taxon counts from 76 columns for 341 taxa
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = sample_data$Species, cols = sample_data$Sample)
## Calculating number of samples with a value greater than 0 for 76 columns in 4 groups for 341 observations
obj$data$diff_table <- compare_groups(obj, data= "tax_abund", 
                                      cols = sample_data$Sample, # What columns of sample data to use
                                      groups = sample_data$Species) # What category each sample is assigned to

#We then need to correct for multiple comparisons and set non-significant differences to zero:
obj <- mutate_obs(obj, "diff_table",
                  wilcox_p_value = p.adjust(wilcox_p_value, method = "fdr")) #false discovery rate
obj$data$diff_table$log2_median_ratio[obj$data$diff_table$wilcox_p_value > 0.05] <- 0


set.seed(50)
heattree<-heat_tree_matrix(obj, key_size = 0.65,
                 data = "diff_table",
                 node_size = n_obs, # the number of OTUs per taxon
                 node_label_size_range = c(0.01, 0.035), 
                 node_label = taxon_names,
                 node_color = log2_median_ratio, 
                 node_color_range = diverging_palette(), 
                 node_color_trans = "linear", 
                 node_color_interval = c(-2, 2), # The range of `log2_median_ratio` 
                 edge_color_interval = c(-2, 2), # The range of `log2_median_ratio`
                 node_size_axis_label = "Number of OTUs",
                 node_color_axis_label = "Log2 ratio median proportions",
                 layout = "davidson-harel", # The primary layout algorithm
                 initial_layout = "reingold-tilford") # The layout algorithm that initializes node locations

plot(heattree)

Comparison of sample types (minimum 2 OTUs per taxon)

tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs

counts_norm_df2<-counts_norm_df
rownames(counts_norm_df2) <- NULL
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column

sample_data <-read.table("Sample_Info_noR.txt", sep = "\t", header=T, check.names=F)

# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df, tax_data, by = "OTU" ) 
Tax_otu_data  <- subset(Tax_otu_data , select = -grep("_", names(Tax_otu_data ))) #delete technical replicates


#converting to taxmap format
obj <- parse_tax_data(Tax_otu_data,
                      class_cols = "taxonomy", # The column in the input table
                      class_sep = ";") # What each taxon is separated by

#remove  "-" (unclassified taxa) and rare taxa:
obj <- filter_taxa(obj, taxon_names != "-")
obj <- filter_taxa(obj, n_obs > 1)


#For the heat tree comparison of abundances
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = sample_data$Sample)
## Summing per-taxon counts from 76 columns for 118 taxa
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = sample_data$Species, cols = sample_data$Sample)
## Calculating number of samples with a value greater than 0 for 76 columns in 4 groups for 118 observations
obj$data$diff_table <- compare_groups(obj, data= "tax_abund", 
                                      cols = sample_data$Sample, # What columns of sample data to use
                                      groups = sample_data$Species) # What category each sample is assigned to

#We then need to correct for multiple comparisons and set non-significant differences to zero:
obj <- mutate_obs(obj, "diff_table",
                  wilcox_p_value = p.adjust(wilcox_p_value, method = "fdr")) #false discovery rate
obj$data$diff_table$log2_median_ratio[obj$data$diff_table$wilcox_p_value > 0.05] <- 0

set.seed(50)
heattree<-heat_tree_matrix(obj, key_size = 0.65,
                 data = "diff_table",
                 node_size = n_obs, # the number of OTUs per taxon
                 node_label_size_range = c(0.02, 0.04), 
                 node_label = taxon_names,
                 node_color = log2_median_ratio, 
                 node_color_range = diverging_palette(), 
                 node_color_trans = "linear", 
                 node_color_interval = c(-2, 2), # The range of `log2_median_ratio` 
                 edge_color_interval = c(-2, 2), # The range of `log2_median_ratio`
                 node_size_axis_label = "Number of OTUs",
                 node_color_axis_label = "Log2 ratio median proportions",
                 layout = "davidson-harel", # The primary layout algorithm
                 initial_layout = "reingold-tilford") # The layout algorithm that initializes node locations

plot(heattree)

iNEXT package - diversity indices

# I used normalized counts after NTCs treatments and without technical replicates
count_tab <- counts_norm_df[, unlist(lapply(counts_norm_df, is.numeric))] 
row.names(count_tab) <- count_tab$OTU.ID
count_tab <- subset(count_tab, select = -grep("_", names(count_tab))) #delete technical replicates
count_tab <- count_tab > 0

# get OTU incidence data for all plots separately
inc_P <- count_tab[,grepl("P", colnames(count_tab))] 
inc_D <- count_tab[,grepl("D", colnames(count_tab))]
inc_V <- count_tab[,grepl("V", colnames(count_tab))]
inc_S <- count_tab[,grepl("S", colnames(count_tab))]

# create a dataframe with incidence data summed by rows within plots
inc_byspecies <- cbind(rowSums(inc_P), rowSums(inc_D), rowSums(inc_V),
                    rowSums(inc_S))

colnames(inc_byspecies) <- c("Pinus", "Diphasiastrum", "Vaccinium", "Soil")

inc_byspecies <- rbind(c(length(colnames(inc_P)),
                      length(colnames(inc_D)),
                      length(colnames(inc_V)),
                      length(colnames(inc_S))), inc_byspecies)

inc_byspecies<-as.data.frame(inc_byspecies)


# Get the diversity estimates
out <- iNEXT(inc_byspecies, q=c(0,1,2), datatype ="incidence_freq", se = TRUE)

# Plot the diversity estimates

# Sample-size-based rarefaction and extrapolation (R/E) curve (type=1). This curve plots diversity estimates with 95% confidence intervals (if se=TRUE) as a function of sample size up to double the reference sample size, by default, or a user‐specified endpoint:

ggiNEXT(out, type=1, facet.var="Order.q", se = TRUE, color.var="Assemblage")

Hill number = effective number of species/OTUs. Hill numbers determines the measures’ sensitivity to species/OTUs relative abundances. Hill numbers include the three most widely used species diversity measures as special cases: species richness (q = 0), Shannon diversity (q = 1, as the effective number of common species/OTUs in the assemblage) and Simpson diversity (q = 2, effective number of dominant species/OTUs in the assemblage)

Sample completeness curve

ggiNEXT(out, type=2, facet.var="None", color.var="Assemblage", se = TRUE)

Sample completeness curve (type=2) with confidence intervals (if se=TRUE): see Figs. 1b and 2b in Hsieh et al. (2016). This curve plots the sample coverage with respect to sample size for the same range described in (1).

ggiNEXT(out, type=3, facet.var="Order.q", color.var="Assemblage", se =TRUE)

Coverage-based R/E curve (type=3): see Figs. 1c and 2c in Hsieh et al. (2016). This curve plots the diversity estimates with confidence intervals (if se=TRUE) as a function of sample coverage up to the maximum coverage obtained from the maximum size described in (1)

Diversity indices (only Ectomycorrhizal OTUs)

# select only ECM OTUs:
matching_rows <- tax_tab$OTU[tax_tab$Guild %in% c("Ectomycorrhizal")]

# Subset the matrix based on the matching row names
filtered_vs_trans_count_tab <- vst_trans_count_tab[matching_rows, ]
Symbio_df <- as.data.frame(filtered_vs_trans_count_tab) 

#I used normalized counts of reads after removing of OTUs found in NTCs and technical replicates
count_tab <- Symbio_df[, unlist(lapply(Symbio_df, is.numeric))] 
row.names(count_tab) <- count_tab$OTU.ID
count_tab <- subset(count_tab, select = -grep("_", names(count_tab))) #delete technical replicates
count_tab <- count_tab > 0

inc_P <- count_tab[,grepl("P", colnames(count_tab))] # get OTU incidence data for all plots separately
inc_D <- count_tab[,grepl("D", colnames(count_tab))]
inc_V <- count_tab[,grepl("V", colnames(count_tab))]
inc_S <- count_tab[,grepl("S", colnames(count_tab))]

# create a dataframe with incidence data summed by rows within plots
inc_byspecies <- cbind(rowSums(inc_P), rowSums(inc_D), rowSums(inc_V),
                    rowSums(inc_S))

colnames(inc_byspecies) <- c("Pinus", "Diphasiastrum", "Vaccinium", "Soil")

inc_byspecies <- rbind(c(length(colnames(inc_P)),
                      length(colnames(inc_D)),
                      length(colnames(inc_V)),
                      length(colnames(inc_S))), inc_byspecies)

inc_byspecies<-as.data.frame(inc_byspecies)

# Get the diversity estimates
out <- iNEXT(inc_byspecies, q=c(0,1,2), datatype ="incidence_freq", se = TRUE)

# Plot the diversity estimates
ggiNEXT(out, type=1, facet.var="Order.q", se = TRUE, color.var="Assemblage")

Sample-size-based rarefaction and extrapolation (R/E) curve (type=1). This curve plots diversity estimates with 95% confidence intervals (if se=TRUE) as a function of sample size up to double the reference sample size, by default, or a user‐specified endpoint:

Sample completeness curve

ggiNEXT(out, type=2, facet.var="None", color.var="Assemblage", se = TRUE)

Trophic Mode (FunGuild database)

#read info table with the samples without replicates
sample_info_no_rep <- read.table("Sample_Info_noR.txt", header=T, row.names=1, check.names=F)

tax_table<-tax_tab
row.names(tax_table) <- tax_table$OTU
tax_table <- tax_table[, -1]
tax_table<- tax_table[order(rownames(tax_table)),]

# create a data frame from normalized dataset
counts_norm_df<-as.data.frame(vst_trans_count_tab)
row_names <- rownames(counts_norm_df)
counts_norm_df <-as.data.frame(counts_norm_df)
rownames(counts_norm_df) <- row_names

# create microeco file
meco_fungi <- microtable$new(sample_table = sample_info_no_rep, otu_table = counts_norm_df, tax_table = tax_table)
 
t1t <- trans_abund$new(dataset = meco_fungi, taxrank = "Trophic_Mode", ntaxa = 10)
## No taxa_abund list found. Calculate relative abundance with cal_abund function ...
## The result is stored in object$taxa_abund ...
## The transformed abundance data is stored in object$data_abund ...
scolor<-c("darkorange1","dodgerblue3","hotpink2", "purple4")
Trophic_Mode <- t1t$plot_box(group = "Species", xtext_angle = 30, color_values = scolor) + ylab("Relative abundance (%)")
Trophic_Mode

Statistical comparison of Saprotrophic OTUs abundancy

abund_t<-t1t$data_abund

shapiro.test(abund_t[abund_t$Taxonomy == "Saprotroph",]$Abundance) # data normally distributed
## 
##  Shapiro-Wilk normality test
## 
## data:  abund_t[abund_t$Taxonomy == "Saprotroph", ]$Abundance
## W = 0.98623, p-value = 0.5848
bartlett.test(Abundance ~ Species, abund_t[abund_t$Taxonomy == "Saprotroph",]) # variances are equal
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Abundance by Species
## Bartlett's K-squared = 5.5208, df = 3, p-value = 0.1374
TukeySapro <- TukeyHSD(aov(Abundance ~ Species, abund_t[abund_t$Taxonomy == "Saprotroph",]))
TukeySapro
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ Species, data = abund_t[abund_t$Taxonomy == "Saprotroph", ])
## 
## $Species
##                              diff        lwr        upr     p adj
## Pinus-Diphasiastrum     -4.043536 -6.9354675 -1.1516037 0.0024935
## Soil-Diphasiastrum      -1.281917 -4.1738493  1.6100145 0.6501675
## Vaccinium-Diphasiastrum -2.397722 -5.2896541  0.4942098 0.1384604
## Soil-Pinus               2.761618 -0.1303138  5.6535501 0.0665675
## Vaccinium-Pinus          1.645813 -1.2461185  4.5377454 0.4447497
## Vaccinium-Soil          -1.115805 -4.0077367  1.7761272 0.7413522

Statistical comparison of Symbiotroph abundancy

abund_t<-t1t$data_abund

shapiro.test(abund_t[abund_t$Taxonomy == "Symbiotroph",]$Abundance) # data not normally distributed
## 
##  Shapiro-Wilk normality test
## 
## data:  abund_t[abund_t$Taxonomy == "Symbiotroph", ]$Abundance
## W = 0.92837, p-value = 0.0003521
bartlett.test(Abundance ~ Species, abund_t[abund_t$Taxonomy == "Symbiotroph",]) # variances are not equal
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Abundance by Species
## Bartlett's K-squared = 9.9411, df = 3, p-value = 0.01907
kruskal_effsize(abund_t[abund_t$Taxonomy == "Symbiotroph",], Abundance ~ Species )
## # A tibble: 1 × 5
##   .y.           n effsize method  magnitude
## * <chr>     <int>   <dbl> <chr>   <ord>    
## 1 Abundance    76   0.172 eta2[H] large
res.kruskal <-kruskal_test(abund_t[abund_t$Taxonomy == "Symbiotroph",], Abundance ~ Species)
res.kruskal 
## # A tibble: 1 × 6
##   .y.           n statistic    df       p method        
## * <chr>     <int>     <dbl> <int>   <dbl> <chr>         
## 1 Abundance    76      15.4     3 0.00153 Kruskal-Wallis
pwc<-dunn_test(abund_t[abund_t$Taxonomy == "Symbiotroph",], Abundance ~ Species, p.adjust.method = "bonferroni") 
pwc
## # A tibble: 6 × 9
##   .y.       group1     group2    n1    n2 statistic       p   p.adj p.adj.signif
## * <chr>     <chr>      <chr>  <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
## 1 Abundance Diphasias… Pinus     19    19     3.86  1.12e-4 6.69e-4 ***         
## 2 Abundance Diphasias… Soil      19    19     1.37  1.70e-1 1   e+0 ns          
## 3 Abundance Diphasias… Vacci…    19    19     1.65  9.84e-2 5.90e-1 ns          
## 4 Abundance Pinus      Soil      19    19    -2.49  1.28e-2 7.66e-2 ns          
## 5 Abundance Pinus      Vacci…    19    19    -2.21  2.70e-2 1.62e-1 ns          
## 6 Abundance Soil       Vacci…    19    19     0.279 7.80e-1 1   e+0 ns

Guild (FunGuild database)

#read info table with the samples without replicates
sample_info_no_rep <- read.table("Sample_Info_noR.txt", header=T, row.names=1, check.names=F)

tax_table<-tax_tab
row.names(tax_table) <- tax_table$OTU
tax_table <- tax_table[, -1]
tax_table<- tax_table[order(rownames(tax_table)),]

# create a data frame from normalized data
counts_norm_df<-as.data.frame(vst_trans_count_tab)
row_names <- rownames(counts_norm_df)
counts_norm_df <-as.data.frame(counts_norm_df)
rownames(counts_norm_df) <- row_names

# create microeco file
meco_fungi <- microtable$new(sample_table = sample_info_no_rep, otu_table = counts_norm_df, tax_table = tax_table)
 
t1 <- trans_abund$new(dataset = meco_fungi, taxrank = "Guild", ntaxa = 15)
## No taxa_abund list found. Calculate relative abundance with cal_abund function ...
## The result is stored in object$taxa_abund ...
## The transformed abundance data is stored in object$data_abund ...
Guilds <- t1$plot_box(group = "Species", xtext_angle = 30, color_values = scolor) + ylab("Relative abundance (%)") 
Guilds

Statistical comparison of Ectomycorrhizal OTUs abundancy

abund<-t1$data_abund

shapiro.test(abund[abund$Taxonomy == "Ectomycorrhizal",]$Abundance) # data not normally distributed
## 
##  Shapiro-Wilk normality test
## 
## data:  abund[abund$Taxonomy == "Ectomycorrhizal", ]$Abundance
## W = 0.96117, p-value = 0.02019
bartlett.test(Abundance ~ Species, abund[abund$Taxonomy == "Ectomycorrhizal",]) # variances are not equal
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Abundance by Species
## Bartlett's K-squared = 11.005, df = 3, p-value = 0.0117
kruskal_effsize(abund[abund$Taxonomy == "Ectomycorrhizal",], Abundance ~ Species )
## # A tibble: 1 × 5
##   .y.           n effsize method  magnitude
## * <chr>     <int>   <dbl> <chr>   <ord>    
## 1 Abundance    76   0.101 eta2[H] moderate
res.kruskal <-kruskal_test(abund[abund$Taxonomy == "Ectomycorrhizal",], Abundance ~ Species)
res.kruskal
## # A tibble: 1 × 6
##   .y.           n statistic    df      p method        
## * <chr>     <int>     <dbl> <int>  <dbl> <chr>         
## 1 Abundance    76      10.3     3 0.0162 Kruskal-Wallis
pwc<-dunn_test(abund[abund$Taxonomy == "Ectomycorrhizal",], Abundance ~ Species, p.adjust.method = "bonferroni") 
pwc
## # A tibble: 6 × 9
##   .y.       group1      group2    n1    n2 statistic       p  p.adj p.adj.signif
## * <chr>     <chr>       <chr>  <int> <int>     <dbl>   <dbl>  <dbl> <chr>       
## 1 Abundance Diphasiast… Pinus     19    19     3.13  0.00175 0.0105 *           
## 2 Abundance Diphasiast… Soil      19    19     1.88  0.0600  0.360  ns          
## 3 Abundance Diphasiast… Vacci…    19    19     1.18  0.240   1      ns          
## 4 Abundance Pinus       Soil      19    19    -1.25  0.212   1      ns          
## 5 Abundance Pinus       Vacci…    19    19    -1.95  0.0507  0.304  ns          
## 6 Abundance Soil        Vacci…    19    19    -0.705 0.481   1      ns

-> Pinus has more ECM than Diphasiastrum

Statistical comparison of Saprotroph abundancy

shapiro.test(abund[abund$Taxonomy == "Saprotroph",]$Abundance) # data normally distributed
## 
##  Shapiro-Wilk normality test
## 
## data:  abund[abund$Taxonomy == "Saprotroph", ]$Abundance
## W = 0.96708, p-value = 0.04557
bartlett.test(Abundance ~ Species, abund[abund$Taxonomy == "Saprotroph",]) # variances are equal
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Abundance by Species
## Bartlett's K-squared = 11.619, df = 3, p-value = 0.008808
TukeySapro <- TukeyHSD(aov(Abundance ~ Species, abund[abund$Taxonomy == "Saprotroph",], correction =))
## Warning: In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
##  extra argument 'correction' will be disregarded
TukeySapro
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Abundance ~ Species, data = abund[abund$Taxonomy == "Saprotroph", ], correction = )
## 
## $Species
##                                 diff        lwr        upr     p adj
## Pinus-Diphasiastrum      0.187951791 -4.4001312  4.7760347 0.9995482
## Soil-Diphasiastrum      -4.998432579 -9.5865155 -0.4103496 0.0273615
## Vaccinium-Diphasiastrum -0.003747485 -4.5918304  4.5843355 1.0000000
## Soil-Pinus              -5.186384370 -9.7744673 -0.5983014 0.0204558
## Vaccinium-Pinus         -0.191699276 -4.7797822  4.3963837 0.9995207
## Vaccinium-Soil           4.994685094  0.4066021  9.5827681 0.0275181

-> Soil has more Saprotrophs than other sample types

Comparison of the samples with different Pinus tree age and tree height

Preparation of datasets

#dataset only for Pinus:
tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs

counts_norm_df2<-counts_norm_df
rownames(counts_norm_df2) <- NULL
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column

# Select all samples ans OTU column:
columns_to_keep <- c(TRUE, !grepl("[_]", names(df)[-1]))
df_all <- df[, columns_to_keep, drop = FALSE] 


sample_info_no_R <- sample_info_no_rep
sample_data <- sample_info_no_R
sample_data <- sample_info_no_R[sample_info_no_R$Species == "Pinus",]

# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df, tax_data, by = "OTU" ) 

All_table<-Tax_otu_data

#write.table(All_table, file = "OTU_count_DIPH.txt", append = FALSE, quote = FALSE, sep = " ",
           # eol = "\n", na = "NA", dec = ".", row.names = FALSE,
           # col.names = TRUE, qmethod = c("escape", "double"),
           # fileEncoding = "")

Pinus trees age distribution

sample_info_no_R <- read.table("Sample_Info_noR.txt", header=T,  check.names=F)
Age<-sort(sample_info_no_R$Age)

plot(Age)

sample_info_no_R$Age_group <- ifelse(sample_info_no_R$Age < 100, "young", 
                                     ifelse(sample_info_no_R$Age < 110, "average", "old"))

Heattrees to compare taxa abundancy between different tree age groups (All samples types)

tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs

counts_norm_df2<-counts_norm_df
rownames(counts_norm_df2) <- NULL
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column

sample_data <- sample_info_no_R

# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df, tax_data, by = "OTU" ) 
Tax_otu_data  <- subset(Tax_otu_data , select = -grep("_", names(Tax_otu_data ))) #delete technical replicates


#converting to taxmap format
obj <- parse_tax_data(Tax_otu_data,
                      class_cols = "taxonomy", # The column in the input table
                      class_sep = ";") # What each taxon is separated by

#remove unclassified taxa:
obj <- filter_taxa(obj, taxon_names != "-")


#For the heat tree comparison of abundances
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = sample_data$Sample)
## Summing per-taxon counts from 76 columns for 341 taxa
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = sample_data$Age_group, cols = sample_data$Sample)
## Calculating number of samples with a value greater than 0 for 76 columns in 3 groups for 341 observations
obj$data$diff_table <- compare_groups(obj, data= "tax_abund", 
                                      cols = sample_data$Sample, # What columns of sample data to use
                                      groups = sample_data$Age_group) # What category each sample is assigned to

#We then need to correct for multiple comparisons and set non-significant differences to zero:
obj <- mutate_obs(obj, "diff_table",
                  wilcox_p_value = p.adjust(wilcox_p_value, method = "fdr")) #false discovery rate
obj$data$diff_table$log2_median_ratio[obj$data$diff_table$wilcox_p_value > 0.05] <- 0

obj$data$diff_table
## # A tibble: 1,023 × 7
##    taxon_id treatment_1 treatment_2 log2_median_ratio median_diff mean_diff
##    <chr>    <chr>       <chr>                   <dbl>       <dbl>     <dbl>
##  1 ab       average     old                         0     -30.7    -39.1   
##  2 ac       average     old                         0     -22.3    -11.5   
##  3 ad       average     old                         0     -11.7     -7.56  
##  4 ae       average     old                         0     -15.2     -8.33  
##  5 af       average     old                         0      -6.99    -6.23  
##  6 ag       average     old                         0       0       -0.296 
##  7 ah       average     old                         0      -0.151   -0.596 
##  8 ai       average     old                         0      -5.40    -4.22  
##  9 ak       average     old                         0       0       -0.0455
## 10 al       average     old                         0       0       -0.0458
## # ℹ 1,013 more rows
## # ℹ 1 more variable: wilcox_p_value <dbl>
range(obj$data$diff_table$wilcox_p_value, finite = TRUE) #check if there are significant differences
## [1] 0.09840729 1.00000000

No significant difference

Comparison of old and young trees (only Pinus samples)

sample_info_no_R$Age_group <- ifelse(sample_info_no_R$Age < mean(sample_info_no_R$Age), "young", "old" )
sample_info_no_R$Age_group <- ifelse(sample_info_no_R$Age < 100, "young", "old" )

tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs

counts_norm_df2<-counts_norm_df
rownames(counts_norm_df2) <- NULL
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column

# Select only Pinus samples:
columns_to_keep <- c(TRUE, !grepl("[DVS]", names(df)[-1]))  # Including the first column
df_P <- df[, columns_to_keep, drop = FALSE]  # Subset dataframe with selected columns

sample_data <- sample_info_no_R
sample_data <- sample_info_no_R[sample_info_no_R$Species == "Pinus",]

# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df_P, tax_data, by = "OTU" ) 
Tax_otu_data  <- subset(Tax_otu_data , select = -grep("_", names(Tax_otu_data ))) #delete technical replicates

#converting to taxmap format
obj <- parse_tax_data(Tax_otu_data,
                      class_cols = "taxonomy", # The column in the input table
                      class_sep = ";") # What each taxon is separated by

#remove unclassified taxa:
obj <- filter_taxa(obj, taxon_names != "-")


#For the heat tree comparison of abundances
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = sample_data$Sample)
## Summing per-taxon counts from 19 columns for 341 taxa
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = sample_data$Age_group, cols = sample_data$Sample)
## Calculating number of samples with a value greater than 0 for 19 columns in 2 groups for 341 observations
obj$data$diff_table <- compare_groups(obj, data= "tax_abund", 
                                      cols = sample_data$Sample, # What columns of sample data to use
                                      groups = sample_data$Age_group) # What category each sample is assigned to

#We then need to correct for multiple comparisons and set non-significant differences to zero:
obj <- mutate_obs(obj, "diff_table",
                  wilcox_p_value = p.adjust(wilcox_p_value, method = "fdr")) #false discovery rate
obj$data$diff_table$log2_median_ratio[obj$data$diff_table$wilcox_p_value > 0.05] <- 0
range(obj$data$diff_table$wilcox_p_value, finite = TRUE)
## [1] 0.3429964 1.0000000

No significant differences

Compare taxa abundancy between groups with different tree height

Tree height distribuion

Height<-sort(sample_info_no_R$Height)
plot(Height)

Comparison of high and small trees (all species samples together)

sample_info_no_R$Height_group <- ifelse(sample_info_no_R$Height < mean(sample_info_no_R$Height), "small", "big" )

tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs

counts_norm_df2<-counts_norm_df
rownames(counts_norm_df2) <- NULL
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column

sample_data <- sample_info_no_R

# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df, tax_data, by = "OTU" ) 
Tax_otu_data  <- subset(Tax_otu_data , select = -grep("_", names(Tax_otu_data ))) #delete technical replicates

#converting to taxmap format
obj <- parse_tax_data(Tax_otu_data,
                      class_cols = "taxonomy", # The column in the input table
                      class_sep = ";") # What each taxon is separated by

#remove unclassified taxa:
obj <- filter_taxa(obj, taxon_names != "-")

#For the heat tree comparison of abundances
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = sample_data$Sample)
## Summing per-taxon counts from 76 columns for 341 taxa
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = sample_data$Height_group, cols = sample_data$Sample)
## Calculating number of samples with a value greater than 0 for 76 columns in 2 groups for 341 observations
obj$data$diff_table <- compare_groups(obj, data= "tax_abund", 
                                      cols = sample_data$Sample, # What columns of sample data to use
                                      groups = sample_data$Height_group) # What category each sample is assigned to
obj <- mutate_obs(obj, "diff_table", wilcox_p_value = p.adjust(wilcox_p_value, method = "fdr"))
obj$data$diff_table$log2_median_ratio[obj$data$diff_table$wilcox_p_value > 0.05] <- 0
range(obj$data$diff_table$wilcox_p_value, finite = TRUE)
## [1] 0.5460561 1.0000000

No significant differences

Comparison of samples with high and small trees (only Pinus samples)

sample_info_no_R$Height_group <- ifelse(sample_info_no_R$Height < mean(sample_info_no_R$Height), "small", "big" )

tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs

counts_norm_df2<-counts_norm_df
rownames(counts_norm_df2) <- NULL
df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column

# Select only Pinus samples ans OTU column:
columns_to_keep <- c(TRUE, !grepl("[DVS_]", names(df)[-1]))
df_P <- df[, columns_to_keep, drop = FALSE] 

sample_data <- sample_info_no_R
sample_data <- sample_info_no_R[sample_info_no_R$Species == "Pinus",]


# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df_P, tax_data, by = "OTU" ) 
Tax_otu_data  <- subset(Tax_otu_data , select = -grep("_", names(Tax_otu_data ))) #delete technical replicates

#converting to taxmap format
obj <- parse_tax_data(Tax_otu_data,
                      class_cols = "taxonomy", # The column in the input table
                      class_sep = ";") # What each taxon is separated by

#remove unclassified taxa:
obj <- filter_taxa(obj, taxon_names != "-")


#For the heat tree comparison of abundances
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = sample_data$Sample)
## Summing per-taxon counts from 19 columns for 341 taxa
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = sample_data$Height_group, cols = sample_data$Sample)
## Calculating number of samples with a value greater than 0 for 19 columns in 2 groups for 341 observations
obj$data$diff_table <- compare_groups(obj, data= "tax_abund", 
                                      cols = sample_data$Sample, # What columns of sample data to use
                                      groups = sample_data$Height_group) # What category each sample is assigned to
obj <- mutate_obs(obj, "diff_table", wilcox_p_value = p.adjust(wilcox_p_value, method = "fdr"))
obj$data$diff_table$log2_median_ratio[obj$data$diff_table$wilcox_p_value > 0.05] <- 0
range(obj$data$diff_table$wilcox_p_value, finite = TRUE)
## [1] 0.1323042 1.0000000

-> No significant differences

Comparison of Diphasiastum samples with different colony sizes

Preparation of the dataset

tax_data <-tax_tab
tax_data <- tax_data[tax_data$OTU %in% rownames(filt_count_tab), ] # delete the blank OTUs

counts_norm_df<-as.data.frame(vst_trans_count_tab)
counts_norm_df2<-counts_norm_df
row_names <- rownames(counts_norm_df)
rownames(counts_norm_df2) <- NULL
counts_norm_df<-as.data.frame(vst_trans_count_tab)

df <- cbind(OTU= row_names, counts_norm_df2) # Add row names as the first column

# Select only Diphasiastrum samples and OTU column:
columns_to_keep <- c(TRUE, !grepl("[SPV_]", names(df)[-1]))
df_D <- df[, columns_to_keep, drop = FALSE] 

sample_info_no_R <- sample_info_no_rep
sample_data <- sample_info_no_R
sample_data <- sample_info_no_R[sample_info_no_R$Species == "Diphasiastrum",]

# Combine the tables sorting by OTU
Tax_otu_data <- left_join(df_D, tax_data, by = "OTU" ) 

D_table<-Tax_otu_data[,1:20]

Diphasiastrum colony size distribution

sample_info_no_R <- read.table("Sample_Info_noR.txt", header=T,  check.names=F)
colony<-sort(sample_info_no_R$Diph_colony)

plot(colony)

sample_info_no_R$Diph_colony <- ifelse(sample_info_no_R$Diph_colony < 10, "small", 
                                     ifelse(sample_info_no_R$Diph_olony < 15, "average", "big"))

Comparison of big and small Diphasiastrum colonies

#converting to taxmap format
obj <- parse_tax_data(Tax_otu_data,
                      class_cols = "taxonomy", # The column in the input table
                      class_sep = ";") # What each taxon is separated by

#remove unclassified taxa:
obj <- filter_taxa(obj, taxon_names != "-")


#For the heat tree comparison of abundances
obj$data$tax_abund <- calc_taxon_abund(obj, "tax_data", cols = rownames(sample_data))
## Summing per-taxon counts from 19 columns for 341 taxa
obj$data$tax_occ <- calc_n_samples(obj, "tax_abund", groups = sample_data$Diph_colony, cols = rownames(sample_data))
## Calculating number of samples with a value greater than 0 for 19 columns in 16 groups for 341 observations
obj$data$diff_table <- compare_groups(obj, data= "tax_abund", 
                                      cols = rownames(sample_data), # What columns of sample data to use
                                      groups = sample_data$Diph_colony) # What category each sample is assigned to

#We then need to correct for multiple comparisons and set non-significant differences to zero:
obj <- mutate_obs(obj, "diff_table",
                  wilcox_p_value = p.adjust(wilcox_p_value, method = "fdr")) #false discovery rate


obj$data$diff_table$log2_median_ratio[obj$data$diff_table$wilcox_p_value > 0.05] <- 0

range(obj$data$diff_table$wilcox_p_value, finite = TRUE) #check if there are significant differences
## [1] 1 1

No significant differences